import scrapy
from urllib.parse import quote


class PhotospiderSpider(scrapy.Spider):
    name = 'PhotoSpider'
    allowed_domains = ['sport.cqupt.edu.cn']
    start_urls = ['http://sport.cqupt.edu.cn']

    def parse(self, response):
        item = {}
        i = 0
        tr_list = response.xpath("//*[@id='bootstrap-table']/tbody/tr")
        for tr in tr_list:
            item["URL"] = "http://202.202.43.198" + tr.xpath(".//td[12]/img/@src").extract_first()
            # 处理 URL 中的中文和空格
            item["URL"] = item["URL"].replace(" ", "%20", 1)
            item["URL"] = item["URL"].replace(" ", "_")
            item["URL"] = quote(item["URL"], safe = ":;/?@&=+#,%_.")
            item["number"] = i
            i += 1
            yield item